In [1]:
import pandas as pd
import numpy as np
from IPython.core.display import display
import sys
sys.path.append('/Users/kyohei/code/myPackages')
import stock
from tqdm import tqdm
import math

import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'AppleGothic'
%matplotlib inline
from IPython.core.display import display

import plotly
from plotly import tools
import plotly.graph_objs as go
import plotly.graph_objects as go
plotly.offline.init_notebook_mode(connected=False)

from pypfopt import expected_returns
from pypfopt import risk_models
from pypfopt.efficient_frontier import EfficientFrontier    

from tslearn.utils import to_time_series_dataset
from tslearn.clustering import KShape
In [2]:
import pickle
def pickle_dump(obj, path):
    with open(path, mode='wb') as f:
        pickle.dump(obj,f)
def pickle_load(path):
    with open(path, mode='rb') as f:
        data = pickle.load(f)
        return data
    
# print(pickle_load('funds.pickle'))
In [3]:
funds = pickle_load('funds.pickle') #stock.getFunds()
fee_df= pd.read_csv('../fee.csv', index_col=0)
df = pd.read_csv('../funds.csv', index_col=0)
In [4]:
def getTmpDf(jp_name=False, year=None, possession=False):
    df = pd.read_csv('../funds.csv', index_col=0)
    df.columns = [c.split('_')[-1] for c in df.columns]
    if possession == True:
        df = df.loc[:,list(possession_funds.keys())]
    if year is not None:
        df = df.query('"{}-12-31" < date < "{}-01-01"'.format(year-1, year+1))
    if jp_name == True:
        df.columns = list(map(lambda x: funds[x]['fund_name'], list(df.columns)))
    return df


def buyFunds(target_fund, yaer, timing, amount_plan):
    """
    IN  : TargetFundDataFrame, BuyingTiming, BuyingAmountsList
    OUT : df
    """
    profit_df = pd.DataFrame(columns=['month', 'amount', 'amount_sum' , 'quantity',
                                      'purchase_price', 'evaluation_price',
                                      'profit', 'actual_profit'])
    profit_df['month'] = [i for i in range(1,13)]
    profit_df['amount'] = amount_plan
    profit_df['amount_sum'] = profit_df['amount'].cumsum()
    profit_df = profit_df.set_index('month')
    
    query = '"{yaer}-{:0=2}-{:0=2}" <= date < "{yaer}-{:0=2}-{:0=2}"'.replace('{yaer}', str(yaer))
    for i, amount in enumerate(profit_df['amount']):
        if amount == 0:
            continue
            
        i = i+1
        target_range = pd.concat([target_fund.query(query.format(i, timing, i, timing+10)),
                                  target_fund.query(query.format(i+1, 1,  i+1, 1+10))])
        purchase_price = target_range.iat[0, 0]
        profit_df.loc[i, ['purchase_price']] = target_range.iat[0, 0]
        profit_df.loc[i, ['quantity']] = math.ceil(profit_df.loc[i, ['amount']]['amount'] /
                                                   (profit_df.loc[i, ['purchase_price']]['purchase_price']/10000))
        profit_df.loc[i, ['evaluation_price']] = target_fund.query(query.format(i, 1, i, 31)).iat[-1, 0]
        
    profit_df['quantity'] = profit_df['quantity'].cumsum()
    profit_df['profit'] = profit_df['quantity'] * (profit_df['evaluation_price']/10000)
    profit_df['actual_profit'] = profit_df['quantity'] * (profit_df['evaluation_price']/10000) - profit_df['amount_sum'] 

    return profit_df


def plotEfficientFrontier(df):
    # calc return & risk
    mu = expected_returns.mean_historical_return(df)
    S = risk_models.sample_cov(df)

    # Portfolio optimization
    ef = EfficientFrontier(mu, S)
    trets = np.arange(0.1, 0.3, 0.01)
    tvols = []
    for tr in trets:
        w = ef.efficient_return(target_return=tr)
        w = pd.Series(w).values
        v = np.sqrt(np.dot(w.T, np.dot(np.array(S), w)))
        tvols += [v]
    plt.style.use('ggplot')
    fig = plt.figure(figsize=(16, 8))
    ax = fig.add_subplot(1, 1, 1)
    ax.scatter(tvols, trets, marker='x')
    ax.set_xlim([0, 0.15])
    ax.set_ylim([0, 0.3])
    ax.set_xlabel('Volatility')
    ax.set_ylabel('Expected return')
    ax.grid(True)
    plt.show()
    return ef

遷移

In [5]:
df_tmp = getTmpDf(year=2019).sort_index()
data = []
for fund in df_tmp.columns:
    data.append(go.Scatter(x=df_tmp.index, y=df_tmp[fund], name=funds[fund.split('_')[-1]]['fund_name']))

plotly.offline.iplot(data, filename='')

基準値遷移

In [6]:
df_tmp = getTmpDf(year=2019).sort_index()
data = []
for fund in df_tmp.columns:
    df_tmp[fund] = df_tmp[fund].apply(lambda x: x / df_tmp[fund][0])  
    data.append(go.Scatter(x=df_tmp.index, y=df_tmp[fund], name=funds[fund.split('_')[-1]]['fund_name']))

plotly.offline.iplot(data, filename='')

偏差値

In [7]:
df_tmp = getTmpDf(year=2019).sort_index()
rm_col = list(df_tmp.isnull().sum()[df_tmp.isnull().sum()>0].index)
df_tmp = df_tmp.drop(rm_col, axis=1)
df_tmp = df_tmp.apply(lambda x: x[-1] - x[0])
df_tmp = df_tmp.map(lambda x: (x - df_tmp.mean()) / df_tmp.std()*10+50).sort_values()[::-1]

# paint
colors = ['lightslategray',] * len(list(df_tmp.index))
tmp = list(map(lambda x : funds[x.split('_')[-1]]['fund_name'], list(df_tmp.index)))
# for i in [tmp.index(v) for v in possession_funds_old.values()]:
#     colors[i] = 'crimson'

fig = go.Figure(data=[
    go.Bar(name='',
           x=list(map(lambda x : funds[x.split('_')[-1]]['fund_name'], list(df_tmp.index))),
           y=list(df_tmp.values),
          marker_color=colors
          )])
fig.show()

利益

In [8]:
df_tmp = getTmpDf(year=2019).sort_index()
rm_col = list(df_tmp.isnull().sum()[df_tmp.isnull().sum()>0].index)
df_tmp = df_tmp.drop(rm_col, axis=1)
amount_plan = [10000]*12
data = []

profit_df = pd.DataFrame(columns=list(df_tmp.columns))
for fund in profit_df.columns:
    profit_df[fund] = buyFunds(df_tmp.loc[:,[fund]], 2019, 1, amount_plan).loc[:,'actual_profit']
    data.append(go.Scatter(x=profit_df.index, y=profit_df[fund], name=funds[fund.split('_')[-1]]['fund_name']))

plotly.offline.iplot(data, filename='')

手数料考慮利益

In [9]:
df_tmp = getTmpDf(year=2019).sort_index()
rm_col = list(df_tmp.isnull().sum()[df_tmp.isnull().sum()>0].index)
df_tmp = df_tmp.drop(rm_col, axis=1)
amount_plan = [10000]*12

profit_df = pd.DataFrame(columns=list(df_tmp.columns))
for fund in profit_df.columns:
    profit_df[fund] = buyFunds(df_tmp.loc[:,[fund]], 2019, 1, amount_plan).loc[:,'profit']
    profit_df[fund] = profit_df[fund] - profit_df[fund] * (fee_df['fee'][fund]/100)
    
profit_df['amount_sum'] = amount_plan
profit_df['amount_sum'] = profit_df['amount_sum'].cumsum()
profit_df = profit_df.apply(lambda x:x-profit_df['amount_sum'])
profit_df = profit_df.drop('amount_sum', axis=1)

data = []
for fund in profit_df.columns:
    data.append(go.Scatter(x=profit_df.index, y=profit_df[fund], name=funds[fund.split('_')[-1]]['fund_name']))
plotly.offline.iplot(data, filename='')
In [10]:
# rank
profit_rank = profit_df.loc[12,:].sort_values()[:-11:-1]

layout = plotly.graph_objs.Layout({
   'xaxis': {'title': 'TOP10 Profit'},
   'yaxis': {'title': 'Profit', 'range': [profit_rank.values[-1]*0.9, profit_rank.values[0]]},
   'barmode': 'relative'}
)

fig = go.Figure(data=[go.Bar(
            x=[funds[x]['fund_name'] for x in profit_rank.index], y=profit_rank.values,
            text= profit_rank.values,
            textposition='auto',
        )],
        layout=layout)

fig.show()

効率的フロンティア

In [11]:
df_tmp = getTmpDf(jp_name=True, year=2019).sort_index()
ef = plotEfficientFrontier(df_tmp)
In [12]:
df_tmp = pd.DataFrame.from_dict(ef.min_volatility(), orient='index',columns=['amount'])
df_tmp.sort_values('amount', ascending=False)[:10]
Out[12]:
amount
たわらノーロード 最適化バランス(保守型) 0.173943
auスマート・ベーシック(安定) 0.170890
auスマート・ベーシック(安定成長) 0.136521
DCニッセイワールドセレクトファンド(安定型) 0.110739
東京海上・円資産インデックスバランスファンド 0.098244
たわらノーロード バランス(堅実型) 0.098002
ニッセイ・インデックスパッケージ(国内・株式/リート/債券) 0.091481
たわらノーロード 最適化バランス(安定型) 0.053900
Smart-i 8資産バランス 安定型 0.034233
eMAXIS最適化バランス(マイゴールキーパー) 0.027479

リターンとリスク

In [13]:
df_tmp = getTmpDf(jp_name=True, year=2019)

mu, S = expected_returns.mean_historical_return(df_tmp), risk_models.sample_cov(df_tmp)
df_tmp = pd.DataFrame(mu, columns=['return'])
df_tmp['risk'] = np.diag(S.values, k=0)
df_tmp = df_tmp.sort_values('return')

fig = go.Figure(data=[
    go.Bar(name='return', x=list(df_tmp.index), y=df_tmp['return'].values),
    go.Bar(name='risk', x=list(df_tmp.index), y=df_tmp['risk'].values)
])
fig.update_layout(barmode='group')
fig.show()

時系列クラスタリング

In [14]:
to_time_series_dataset(getTmpDf(jp_name=True, year=2019).values)
ks = KShape(n_clusters=10)
rs = ks.fit_predict(to_time_series_dataset(getTmpDf(year=2019).dropna(axis='columns').pct_change().dropna(how='all').T.values))

plot_df = pd.DataFrame(None, columns=getTmpDf(year=2019).dropna(axis='columns').columns).T
plot_df['return'] = getTmpDf(year=2019).pct_change().dropna(how='all').mean()
plot_df['risk'] = getTmpDf(year=2019).pct_change().dropna(how='all').std()
plot_df['name'] = list(map(lambda x: funds[x]['fund_name'], list(plot_df.index)))

group = [[] for i in range(10)]
for c, i in zip(rs, getTmpDf(year=2019).dropna(axis='columns').columns):
    group[c].append(i)
plot_df['group'] = None
for i, g in enumerate(group):
    plot_df.loc[g, ['group']] = str(i) 

import plotly.express as px
fig = px.scatter(plot_df, x='risk', y='return', color='group', hover_data=['name'])
fig.show()
In [ ]: